Updates to Machine Learning for Pediatric Obesity

08 March 2018

Accomplished since last meeting

  • Another 1900 addresses were able to be geocoded
  • Relative coordinates were converted to x,y pairs and a model ready data set was created
  • Progressive time models completed
  • High risk kids (9 boys and 7 girls) from the sample models were extracted for to get their growth curves
  • Sample figures for the paper are created
  • A function to create table outputs for summary statistics of key features is created
  • Acuracy comparisons for the best models compared to Mary Jo's original and updated inclusion criteria

BMI Percentile Distribution for All Children in the Cohort

In [4]:
fig, ax = plt.subplots(figsize=(12,8))
index = np.arange(len(bins))
bar_width = 0.4
opacity = 0.8

bar1 = plt.bar(index, boys_bins, bar_width, alpha=opacity,
               label='Boys (n={0:,.0f}; $\mu$={1:,.2f})'.format(np.sum(boys_bins), np.mean(boys)))
 
bar2 = plt.bar(index + bar_width, girls_bins, bar_width, alpha=opacity,
               label='Girls (n={0:,.0f}; $\mu$={1:,.2f})'.format(np.sum(girls_bins), np.mean(girls)))

# plt.xlabel('Obesity Percentiles', fontsize=14)
plt.ylabel('Number of Children', fontsize=14)
plt.title('BMI Percentile Distribution at 4.5 to 5.5 years - No Exclusions', fontsize=20)
# plt.xticks(index + bar_width, ['{0:.1f}%'.format(b * 100) for b in bins])
plt.xticks(index, bin_names, rotation=30)
ax.yaxis.grid(linestyle='--')
ax.set_axisbelow(True)
plt.legend(fontsize=12)
 
plt.tight_layout()
# plt.savefig('../outputs_age_analyses20180221/no_exclusions_bmi_percentiles.png', dpi=96)
plt.show()

Growth Curves for the 16 Highest Risk Kids in the Data Set

In [7]:
train.plot_growth_curve(d1,None,keys[0], hide_mrn=True)
In [8]:
train.plot_growth_curve(d1,None,keys[1], hide_mrn=True)
In [9]:
train.plot_growth_curve(d1,None,keys[2], hide_mrn=True)
In [10]:
train.plot_growth_curve(d1,None,keys[3], hide_mrn=True)
In [11]:
train.plot_growth_curve(d1,None,keys[4], hide_mrn=True)
In [12]:
train.plot_growth_curve(d1,None,keys[5], hide_mrn=True)
In [13]:
train.plot_growth_curve(d1,None,keys[6], hide_mrn=True)
In [14]:
train.plot_growth_curve(d1,None,keys[7], hide_mrn=True)
In [15]:
train.plot_growth_curve(d1,None,keys[8], hide_mrn=True)
In [16]:
train.plot_growth_curve(d1,None,keys[9], hide_mrn=True)
In [17]:
train.plot_growth_curve(d1,None,keys[10], hide_mrn=True)
In [18]:
train.plot_growth_curve(d1,None,keys[11], hide_mrn=True)
In [19]:
train.plot_growth_curve(d1,None,keys[12], hide_mrn=True)
In [20]:
train.plot_growth_curve(d1,None,keys[13], hide_mrn=True)
In [21]:
train.plot_growth_curve(d1,None,keys[14], hide_mrn=True)
In [22]:
train.plot_growth_curve(d1,None,keys[15], hide_mrn=True)

Results for the most predictive feature: 'Vital: BMI-latest'

In [24]:
modelix = 'BMI'
plt.figure(figsize=(9,9))
for ix in range(len(prec_total)):
    if modelix not in titles_total[ix] or 'randomforest' in titles_total[ix] or 'gradientboost' in titles_total[ix]:
        continue
    if 'girls' in titles_total[ix]:
        plt.plot(1- np.array(spec_total[ix]), np.array(recall_total[ix]), linestyle='-', label=titles_total[ix]+' - AUC={:0.2f}'.format(auc_list[ix][0]))
    else:
        plt.plot(1- np.array(spec_total[ix]), np.array(recall_total[ix]), linestyle='--', label=titles_total[ix]+' - AUC={:0.2f}'.format(auc_list[ix][0]))

plt.legend(fontsize = 10)
plt.xlabel('1 - Specificity', fontsize=12)
plt.ylabel('Sensitivity', fontsize=12)
plt.axis('equal')
plt.title('ROC Curve: Obesity Predicted at 5 years - "Vital: BMI-latest"', fontsize=14)
plt.grid(True)
plt.tight_layout()
# plt.savefig(newdir+'/Pediatric_Girls_ROC.png', dpi=300)
plt.show()
In [25]:
plt.figure(figsize=(9,9))
for ix in range(len(prec_total)):
    if modelix not in titles_total[ix] or 'randomforest' in titles_total[ix] or 'gradientboost' in titles_total[ix]:
        continue
    if 'girls' in titles_total[ix]:
        plt.plot(recall_total[ix], prec_total[ix], linestyle='-', label=titles_total[ix]+' - AUC={:0.2f}'.format(auc_list[ix][0]))
    else:
        plt.plot(recall_total[ix], prec_total[ix], linestyle='--', label=titles_total[ix]+' - AUC={:0.2f}'.format(auc_list[ix][0]))
plt.xlabel('Recall (Sensitivity)', fontsize=14)
plt.ylabel('Precision (PPV)', fontsize=14)
plt.legend(fontsize = 8)
plt.axis('equal')
plt.title('Precision-Recall Curve: Obesity Predicted at 5 years - "Vital: BMI-latest"', fontsize=14)
plt.grid()
plt.tight_layout()
# plt.savefig(newdir+'/Pediatric_Girls_PR.png', dpi=300)
plt.show()

LASSO and Random Forest Comparison for Boys and Girls

In [26]:
plt.figure(figsize=(9,9))
for ix in range(len(prec_total)):
    if any(x in titles_total[ix] for x in ('w/o vitals','Wt','no_maternal','w/o exclusions', 'BMI','gradientboost')):
        continue
    if 'girls' in titles_total[ix]:
        plt.plot(1- np.array(spec_total[ix]), np.array(recall_total[ix]), linestyle='-', label=titles_total[ix]+' - AUC={:0.2f}'.format(auc_list[ix][0]))
    else:
        plt.plot(1- np.array(spec_total[ix]), np.array(recall_total[ix]), linestyle='--', label=titles_total[ix]+' - AUC={:0.2f}'.format(auc_list[ix][0]))

plt.legend(fontsize = 9)
plt.xlabel('1 - Specificity', fontsize = 14)
plt.ylabel('Sensitivity', fontsize = 14)
plt.axis('equal')
plt.title('ROC Curve: Obesity Predicted at 5 years', fontsize = 18)
plt.grid(True)
plt.tight_layout()
# plt.savefig(newdir+'/Pediatric_Girls_ROC.png', dpi=300)
plt.show()
In [27]:
plt.figure(figsize=(9,9))
for ix in range(len(prec_total)):
    if any(x in titles_total[ix] for x in ('w/o vitals','Wt','no_maternal','w/o exclusions', 'BMI','gradientboost')):
        continue
    if 'girls' in titles_total[ix]:
        plt.plot(recall_total[ix], prec_total[ix], linestyle='-', label=titles_total[ix]+' - AUC={:0.2f}'.format(auc_list[ix][0]))
    else:
        plt.plot(recall_total[ix], prec_total[ix], linestyle='--', label=titles_total[ix]+' - AUC={:0.2f}'.format(auc_list[ix][0]))
plt.xlabel('Recall (Sensitivity)', fontsize = 14)
plt.ylabel('Precision (PPV)', fontsize = 14)
plt.legend(fontsize = 8)
plt.axis('equal')
plt.title('Precision-Recall Curve: Obesity Predicted at 5 years', fontsize = 18)
plt.grid()
plt.tight_layout()
# plt.savefig(newdir+'/Pediatric_Girls_PR.png', dpi=300)
plt.show()
In [28]:
plt.figure(figsize=(18,9))
plt.subplot(1, 2, 1)
for ix in range(len(prec_total)):
    if any(x in titles_total[ix] for x in ('w/o vitals','Wt','no_maternal','w/o exclusions', 'BMI','gradientboost')):
        continue
    if 'girls' in titles_total[ix]:
        plt.plot(recall_total[ix], prec_total[ix], linestyle='-', label=titles_total[ix]+' - AUC={:0.2f}'.format(auc_list[ix][0]))
    else:
        continue
plt.ylabel('Precision (PPV)', fontsize = 14)
plt.legend(fontsize = 8, loc=8)
plt.axis('equal')
plt.title('Girls Precision-Recall Curve: Obesity Predicted at 5 years', fontsize = 18)
plt.grid()
plt.tight_layout()

plt.subplot(1, 2, 2)
for ix in range(len(prec_total)):
    if any(x in titles_total[ix] for x in ('w/o vitals','Wt','no_maternal','w/o exclusions', 'BMI','gradientboost')):
        continue
    if 'boys' in titles_total[ix]:
        plt.plot(recall_total[ix], prec_total[ix], linestyle='-', label=titles_total[ix]+' - AUC={:0.2f}'.format(auc_list[ix][0]))
    else:
        continue
plt.legend(fontsize = 8, loc=8)
plt.axis('equal')
plt.xlabel('Recall (Sensitivity)', fontsize = 14)
plt.title('Boys Precision-Recall Curve: Obesity Predicted at 5 years', fontsize = 18)
plt.grid()
plt.tight_layout()
# plt.savefig(newdir+'/Pediatric_Girls_PR.png', dpi=300)
plt.show()

Results for the best model each prediction age

In [30]:
plt.figure(figsize=(9,9))
for l in top_ix:
    if 'girls' in titles_total[l[2]]:
        plt.plot(1- np.array(spec_total[l[2]]), np.array(recall_total[l[2]]), linestyle='-', label=titles_total[l[2]]+' - AUC={:0.2f}'.format(auc_list[l[2]][0]))
    else:
        plt.plot(1- np.array(spec_total[l[2]]), np.array(recall_total[l[2]]), linestyle='--', label=titles_total[l[2]]+' - AUC={:0.2f}'.format(auc_list[l[2]][0]))

plt.legend(fontsize = 10)
plt.xlabel('1 - Specificity', fontsize=14)
plt.ylabel('Sensitivity', fontsize=14)
plt.axis('equal')
plt.title('ROC Curve: Obesity Predicted at 5 years', fontsize=18)
plt.grid(True)
plt.tight_layout()
# plt.savefig(newdir+'/Pediatric_Girls_ROC.png', dpi=300)
plt.show()
In [31]:
plt.figure(figsize=(9,9))
for l in top_ix:
    if 'girls' in titles_total[l[2]]:
        plt.plot(recall_total[l[2]], prec_total[l[2]], linestyle='-', label=titles_total[l[2]]+' - AUC={:0.2f}'.format(auc_list[l[2]][0]))
    else:
        plt.plot(recall_total[l[2]], prec_total[l[2]], linestyle='--', label=titles_total[l[2]]+' - AUC={:0.2f}'.format(auc_list[l[2]][0]))
plt.xlabel('Recall (Sensitivity)', fontsize=14)
plt.ylabel('Precision (PPV)', fontsize=14)
plt.legend(fontsize = 10)
plt.axis('equal')
plt.title('Precision-Recall Curve: Obesity Predicted at 5 years', fontsize=18)
plt.grid()
plt.tight_layout()
# plt.savefig(newdir+'/Pediatric_Girls_PR.png', dpi=300)
plt.show()
In [32]:
plt.figure(figsize=(20,10))
plt.subplot(1, 2, 1)
for l in top_ix:
    if 'girls' in titles_total[l[2]]:
        plt.plot(recall_total[l[2]], prec_total[l[2]], linestyle='-', label=titles_total[l[2]]+' - AUC={:0.2f}'.format(auc_list[l[2]][0]))
    else:
        continue
plt.ylabel('Precision (PPV)', fontsize = 14)
plt.legend(fontsize=10, loc=8)
plt.axis('equal')
plt.title('Girls Precision-Recall Curve: Obesity Predicted at 5 years', fontsize = 18)
plt.grid()
plt.tight_layout()

plt.subplot(1, 2, 2)
for l in top_ix:
    if 'boys' in titles_total[l[2]]:
        plt.plot(recall_total[l[2]], prec_total[l[2]], linestyle='-', label=titles_total[l[2]]+' - AUC={:0.2f}'.format(auc_list[l[2]][0]))
    else:
        continue
plt.legend(fontsize=10, loc=8)
plt.axis('equal')
plt.xlabel('Recall (Sensitivity)', fontsize = 14)
plt.title('Boys Precision-Recall Curve: Obesity Predicted at 5 years', fontsize = 18)
plt.grid()
plt.tight_layout()
# plt.savefig(newdir+'/Pediatric_Girls_PR.png', dpi=300)
plt.show()

All Top Models

In [35]:
df_l = []
headers=['ix','title','auc','auc ste']
for ix,(auc,title) in enumerate(zip(auc_list,titles_total)):
    df_l.append([ix,title,auc[0],auc[1]])

df = pd.DataFrame(df_l, columns=headers)
top_ix = []
for age in ['@ 6','@ 12','@ 18','@ 24','@ 36','@ 48']:
    for g in ['boys','girls']:
        filtered = df[(df.title.str.contains(age)) &(df.title.str.contains(g)) & (df.title.str.contains('w/o exclusions')) & (~df.title.str.contains('no_maternal'))].sort_values(by='auc', axis=0, ascending=False)
        print(filtered.head(3))
        name = filtered.iloc[0,1].replace(" maternal w/o exclusions - model: ", " ")
        top_ix.append([age,g,filtered.iloc[0,0],name])
    ix                                              title       auc   auc ste
21  21  boys maternal w/o exclusions - model: randomfo...  0.642392  0.018267
5    5  boys maternal w/o exclusions - model: lasso@ 6...  0.633844  0.016899
37  37  boys maternal w/o exclusions - model: gradient...  0.630358  0.015649
    ix                                              title       auc   auc ste
13  13  girls maternal w/o exclusions - model: lasso@ ...  0.692385  0.020549
29  29  girls maternal w/o exclusions - model: randomf...  0.677942  0.015822
45  45  girls maternal w/o exclusions - model: gradien...  0.669962  0.017309
    ix                                              title       auc   auc ste
85  85  boys maternal w/o exclusions - model: gradient...  0.680082  0.014816
69  69  boys maternal w/o exclusions - model: randomfo...  0.672767  0.011746
53  53  boys maternal w/o exclusions - model: lasso@ 1...  0.670870  0.010762
    ix                                              title       auc   auc ste
77  77  girls maternal w/o exclusions - model: randomf...  0.726243  0.013140
93  93  girls maternal w/o exclusions - model: gradien...  0.726203  0.014749
61  61  girls maternal w/o exclusions - model: lasso@ ...  0.720417  0.017287
      ix                                              title       auc  \
133  133  boys maternal w/o exclusions - model: gradient...  0.721950   
117  117  boys maternal w/o exclusions - model: randomfo...  0.720265   
101  101  boys maternal w/o exclusions - model: lasso@ 1...  0.719445   

      auc ste  
133  0.010960  
117  0.012022  
101  0.011076  
      ix                                              title       auc  \
125  125  girls maternal w/o exclusions - model: randomf...  0.779235   
109  109  girls maternal w/o exclusions - model: lasso@ ...  0.768030   
141  141  girls maternal w/o exclusions - model: gradien...  0.759643   

      auc ste  
125  0.008353  
109  0.012006  
141  0.013372  
      ix                                              title       auc  \
181  181  boys maternal w/o exclusions - model: gradient...  0.767275   
149  149  boys maternal w/o exclusions - model: lasso@ 2...  0.758447   
165  165  boys maternal w/o exclusions - model: randomfo...  0.757014   

      auc ste  
181  0.011424  
149  0.009043  
165  0.011258  
      ix                                              title       auc  \
189  189  girls maternal w/o exclusions - model: gradien...  0.806009   
157  157  girls maternal w/o exclusions - model: lasso@ ...  0.805645   
173  173  girls maternal w/o exclusions - model: randomf...  0.803393   

      auc ste  
189  0.010217  
157  0.009200  
173  0.005738  
      ix                                              title       auc  \
197  197  boys maternal w/o exclusions - model: lasso@ 3...  0.820958   
229  229  boys maternal w/o exclusions - model: gradient...  0.819836   
213  213  boys maternal w/o exclusions - model: randomfo...  0.818687   

      auc ste  
197  0.011484  
229  0.009750  
213  0.006361  
      ix                                              title       auc  \
205  205  girls maternal w/o exclusions - model: lasso@ ...  0.885750   
221  221  girls maternal w/o exclusions - model: randomf...  0.878544   
237  237  girls maternal w/o exclusions - model: gradien...  0.872451   

      auc ste  
205  0.010014  
221  0.006395  
237  0.003827  
      ix                                              title       auc  \
277  277  boys maternal w/o exclusions - model: gradient...  0.895406   
261  261  boys maternal w/o exclusions - model: randomfo...  0.893322   
245  245  boys maternal w/o exclusions - model: lasso@ 4...  0.887492   

      auc ste  
277  0.006081  
261  0.006928  
245  0.007179  
      ix                                              title       auc  \
253  253  girls maternal w/o exclusions - model: lasso@ ...  0.925774   
285  285  girls maternal w/o exclusions - model: gradien...  0.921913   
269  269  girls maternal w/o exclusions - model: randomf...  0.914098   

      auc ste  
253  0.006551  
285  0.003646  
269  0.003308  

Comparison of Mary Jo's Inclusion Criteria to Pediatric Study

  • Using the best model for boys and girsl where prediction is at 24 months
  • Original: base parameters for the pediatric study where no inclusion criteria is used
  • Inclusion: uses Mary Jo's inclusion criteria
    • Age of mother at birth is at least 18 years old
    • No complications with the child delivery
    • Mother is Hispanic/Latina
  • Modified Inclusion: the same as above but all ethnicities are allowed
In [40]:
plt.figure(figsize=(9,9))
for i,title in enumerate(titles_24):
    if 'original' in titles_24[i].lower():
        plt.plot(1- np.array(spec_24[i]), np.array(recall_24[i]), linestyle='-', label=title+' - AUC={:0.2f}'.format(auc_24[i]))
    elif 'inclusion mod' in titles_24[i].lower():
        plt.plot(1- np.array(spec_24[i]), np.array(recall_24[i]), linestyle=':', label=title+' - AUC={:0.2f}'.format(auc_24[i]))
    else:
        plt.plot(1- np.array(spec_24[i]), np.array(recall_24[i]), linestyle='--', label=title+' - AUC={:0.2f}'.format(auc_24[i]))

plt.legend(fontsize = 10)
plt.xlabel('1 - Specificity', fontsize=14)
plt.ylabel('Sensitivity', fontsize=14)
plt.axis('equal')
plt.title('ROC Curve: Obesity Predicted at 5 years', fontsize=18)
plt.grid(True)
plt.tight_layout()
# plt.savefig(newdir+'/Pediatric_Girls_ROC.png', dpi=300)
plt.show()
In [43]:
plt.figure(figsize=(9,9))
for i,title in enumerate(titles_24):
    if 'original' in titles_24[i].lower():
        plt.plot(recall_24[i], prec_24[i], linestyle='-', label=title+' - AUC={:0.2f}'.format(auc_24[i]))
    elif 'inclusion mod' in titles_24[i].lower():
        plt.plot(recall_24[i], prec_24[i], linestyle=':', label=title+' - AUC={:0.2f}'.format(auc_24[i]))
    else:
        plt.plot(recall_24[i], prec_24[i], linestyle='--', label=title+' - AUC={:0.2f}'.format(auc_24[i]))
plt.xlabel('Recall (Sensitivity)', fontsize=14)
plt.ylabel('Precision (PPV)', fontsize=14)
plt.legend(fontsize = 10)
plt.axis('equal')
plt.title('Precision-Recall Curve: Obesity Predicted at 5 years', fontsize=18)
plt.grid()
plt.tight_layout()
# plt.savefig(newdir+'/Pediatric_Girls_PR.png', dpi=300)
plt.show()

Comparison of Mary Jo's Inclusion Criteria to Pediatric Study

  • Using the best model for each prediction age for boys and girls
  • Original: base parameters for the pediatric study where no inclusion criteria is used
  • Inclusion: uses Mary Jo's inclusion criteria
    • Age of mother at birth is at least 18 years old
    • No complications with the child delivery
    • Mother is Hispanic/Latina
  • Modified Inclusion: the same as above but all ethnicities are allowed
In [47]:
plt.figure(figsize=(20,10))

plt.subplot(1, 2, 1)
for i,title in enumerate(titles_best):
    if 'girls' in titles_best[i]:
        if 'original' in titles_best[i].lower():
            plt.plot(1- np.array(spec_best[i]), np.array(recall_best[i]), linestyle='-', label=title+' - AUC={:0.2f}'.format(auc_best[i]))
        elif 'mod.' in titles_best[i].lower():
            plt.plot(1- np.array(spec_best[i]), np.array(recall_best[i]), linestyle=':', label=title+' - AUC={:0.2f}'.format(auc_best[i]))
        else:
            plt.plot(1- np.array(spec_best[i]), np.array(recall_best[i]), linestyle='--', label=title+' - AUC={:0.2f}'.format(auc_best[i]))
    else:
        continue
plt.legend(fontsize = 10)
plt.ylabel('Sensitivity', fontsize=14)
plt.xlabel('1 - Specificity', fontsize=14)
plt.axis('equal')
plt.title('Girls ROC Curve: Obesity Predicted at 5 years', fontsize=18)
plt.grid(True)
plt.tight_layout()        

plt.subplot(1, 2, 2)
for i,title in enumerate(titles_best):
    if 'boys' in titles_total[i]:
        if 'original' in titles_best[i].lower():
            plt.plot(1- np.array(spec_best[i]), np.array(recall_best[i]), linestyle='-', label=title+' - AUC={:0.2f}'.format(auc_best[i]))
        elif 'mod.' in titles_best[i].lower():
            plt.plot(1- np.array(spec_best[i]), np.array(recall_best[i]), linestyle=':', label=title+' - AUC={:0.2f}'.format(auc_best[i]))
        else:
            plt.plot(1- np.array(spec_best[i]), np.array(recall_best[i]), linestyle='--', label=title+' - AUC={:0.2f}'.format(auc_best[i]))
    else:
        continue
plt.legend(fontsize = 10)
plt.xlabel('1 - Specificity', fontsize=14)
plt.axis('equal')
plt.title('Boys ROC Curve: Obesity Predicted at 5 years', fontsize=18)
plt.grid(True)
plt.tight_layout()        

# plt.savefig(newdir+'/Pediatric_Girls_PR.png', dpi=300)
plt.show()
In [49]:
plt.figure(figsize=(20,10))

plt.subplot(1, 2, 1)
for i,title in enumerate(titles_best):
    if 'girls' in titles_best[i]:
        if 'original' in titles_best[i].lower():
            plt.plot(recall_best[i], prec_best[i], linestyle='-', label=title+' - AUC={:0.2f}'.format(auc_best[i]))
        elif 'mod.' in titles_best[i].lower():
            plt.plot(recall_best[i], prec_best[i], linestyle=':', label=title+' - AUC={:0.2f}'.format(auc_best[i]))
        else:
            plt.plot(recall_best[i], prec_best[i], linestyle='--', label=title+' - AUC={:0.2f}'.format(auc_best[i]))
    else:
        continue
plt.ylabel('Precision (PPV)', fontsize=14)
plt.xlabel('Recall (Sensitivity)', fontsize=14)

plt.legend(fontsize = 8)
plt.axis('equal')
plt.title('Girls Precision-Recall Curve: Obesity Predicted at 5 years', fontsize=18)
plt.grid()
plt.tight_layout()        

plt.subplot(1, 2, 2)
for i,title in enumerate(titles_best):
    if 'boys' in titles_best[i]:
        if 'original' in titles_best[i].lower():
            plt.plot(recall_best[i], prec_best[i], linestyle='-', label=title+' - AUC={:0.2f}'.format(auc_best[i]))
        elif 'mod.' in titles_best[i].lower():
            plt.plot(recall_best[i], prec_best[i], linestyle=':', label=title+' - AUC={:0.2f}'.format(auc_best[i]))
        else:
            plt.plot(recall_best[i], prec_best[i], linestyle='--', label=title+' - AUC={:0.2f}'.format(auc_best[i]))
    else:
        continue
plt.xlabel('Recall (Sensitivity)', fontsize=14)
plt.legend(fontsize = 8)
plt.axis('equal')
plt.title('Boys Precision-Recall Curve: Obesity Predicted at 5 years', fontsize=18)
plt.grid()
plt.tight_layout()      

# plt.savefig(newdir+'/Pediatric_Girls_PR.png', dpi=300)
plt.show()

Next Steps

  • Rerun analyses at each of the prediction time points with:
    • all data
    • all data excluding weight and BMI readings
    • all data excluding census data
    • predict for extreme obesity (99%?)
  • Update address data from the most recent data release
    • Will need to be geocoded
    • Census features for at birth, 2 years old (or month we are predicting from), 1 year before birth?
  • Report back on results with cleaned up plots